#include <linux/module.h>
#include <linux/stacktrace.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/timex.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/rtc.h>
#include <linux/time.h>
#include <linux/version.h>
#include <linux/cpu.h>
#include <linux/tracepoint.h>
#include <linux/kprobes.h>
#include "include/blackbox.h"
#include "proc.h"

#ifdef CONFIG_X86
#define MAX_SYMBOL_LEN	64
#define PATH_LEN 256
#define STACK_DEPTH 100
#define STACK_DETAIL_DEPTH 20
#define PROC_NUMBUF 256
#define SHOW_BUF_LEN 64

#define WAIT_TIMEOUT HZ
#define LOCK_TIMEOUT HZ

#define REGISTER_FAILED 1

LIST_HEAD(monitor_list);
LIST_HEAD(vma_list);
rwlock_t thdlist_lock;
extern struct mm_struct *get_task_mm(struct task_struct *task);

static struct kprobe kp_wake = {
	.symbol_name	= "futex_wake",
};

static struct kretprobe krp_wait = {
	.kp.symbol_name = "futex_wait",
	.maxactive		= 10000,
};

pid_t monitor_pid = 0;
pid_t lock_owner= 0;
bool enable_print_ustack = false;
bool enbale_ulockcheck = false;
unsigned long max_wait_time;
unsigned long max_lock_time;
int wait_delay_thresold = WAIT_TIMEOUT;
int lock_delay_thresold = LOCK_TIMEOUT;

static int ulock_bid = -1;

struct stack_info {
	unsigned long bp;
	char path[PATH_LEN];
};

struct vma_info{
	struct list_head list;
	unsigned long start;
	unsigned long end;
	int exectue;
	char path[PATH_LEN];
};

struct task_info{
	pid_t pid;
	pid_t tgid;
	struct list_head task_list;
	char comm[TASK_COMM_LEN];

	unsigned long fwait_count;
	unsigned long fwait_delay;

	unsigned long fwake_count;
	unsigned long fwake_time;

	unsigned long wait_time;
	unsigned long outtime_count;

	unsigned long sch_total;
	unsigned long total_delay;

	unsigned long uaddr;

	unsigned long lock_time;
	unsigned long lock_delay;
	bool lock;
	unsigned long lock_count;

	//struct list_head vma_list;
	struct stack_info stack[STACK_DETAIL_DEPTH];
};

void save_mmapstack_trace_user(struct task_struct *task, struct task_info *tsk)
{
	struct list_head *vma_entry;
	const struct pt_regs *regs = task_pt_regs(current);
	const void __user *fp = (const void __user *)regs->sp;
	int stack_len = 0 ;
	int i;

	for (i = 0; i < STACK_DEPTH; i++){
		if (stack_len > STACK_DETAIL_DEPTH)
			break;
		list_for_each(vma_entry, &vma_list){
			//struct vma_info *vma = (struct vma_info *)vma_entry;
			struct vma_info *vma = container_of(vma_entry, struct vma_info, list);
			unsigned long tmp;

			if (!copy_from_user(&tmp, fp+i*__SIZEOF_LONG__, __SIZEOF_LONG__)) {
				if ((tmp >= vma->start) && (tmp <= vma->end)) {
					tsk->stack[stack_len].bp = tmp;
					strcpy(tsk->stack[stack_len].path,vma->path);
					stack_len++;
				}
			}
		}
	}
}

static int save_calltrace(struct pt_regs *regs)
{
	struct list_head *tsk_entry;
	struct task_info *new_tsk;
	pid_t tgid = 0;

	list_for_each(tsk_entry, &monitor_list){
		struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list);
		tgid = tsk->tgid;
		if (tsk->pid == current->pid){
			tsk->fwait_count++;
			tsk->wait_time = jiffies;
			tsk->uaddr = regs->di;
			save_mmapstack_trace_user(current,tsk);
			return 0;
		}
    }
	if (tgid == current->tgid){
		new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL);
		if (!new_tsk)
			return 0;
		new_tsk->pid = current->pid;
		new_tsk->tgid = tgid;
		memcpy(new_tsk->comm,current->comm,sizeof(new_tsk->comm));
		new_tsk->fwait_count++;
		new_tsk->wait_time = jiffies;
		new_tsk->uaddr = regs->di;

		save_mmapstack_trace_user(current,new_tsk);
		list_add_tail(&new_tsk->task_list,&monitor_list);
	}
	return 0;
}

static void get_filename(char *buf, const struct path *path, size_t size)
{
	if (size) {
		char *p = d_path(path, buf, size);
		if (!IS_ERR(p)) {
			strcpy(buf,p);
		}
	}
}

/*static int before_futex_wait(struct kprobe *p, struct pt_regs *regs)
{
	int ret;

	if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid))
		return 0;

	write_lock(&thdlist_lock);
	ret = save_calltrace(regs);
	write_unlock(&thdlist_lock);
	return 0;
}
*/

static int after_futex_wait(struct kretprobe_instance *ri, struct pt_regs *regs)
{
	struct list_head *pos;
	unsigned long wait_time;
	int i, len;
	char task_show_buf[SHOW_BUF_LEN];
	struct bbox_data_info data_info;

	if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid))
		return 0 ;

	data_info.data = task_show_buf;

	read_lock(&thdlist_lock);
	list_for_each(pos, &monitor_list){
		struct task_info *tsk_info = container_of(pos, struct task_info, task_list);
		if (tsk_info->pid == current->pid){
			tsk_info->fwait_delay += jiffies - tsk_info->wait_time;
			wait_time = jiffies - tsk_info->wait_time;
			max_wait_time = wait_time > max_wait_time ? wait_time : max_wait_time;
			if (wait_time > wait_delay_thresold){
				tsk_info->outtime_count++;
				if (enable_print_ustack){
					len = snprintf(task_show_buf, SHOW_BUF_LEN, "task %d[%s], wait delay %ld ticks,",
						tsk_info->pid, tsk_info->comm, wait_time);
					data_info.size = len;
					bbox_write(ulock_bid, &data_info);
					len = sprintf(task_show_buf,"user stack:\n");
					data_info.size = len;
					bbox_write(ulock_bid, &data_info);
					for (i = 0; i < STACK_DETAIL_DEPTH; i++){
						if (tsk_info->stack[i].bp == 0) {
							continue;
						}
						len = sprintf(task_show_buf, "#~  0x%lx   %s\n",
							tsk_info->stack[i].bp, tsk_info->stack[i].path);
						data_info.size = len;
						bbox_write(ulock_bid, &data_info);
					}
				}
			}
			tsk_info->lock_time = jiffies;
			lock_owner = tsk_info->pid;
			//tsk_info->lock = TRUE;
			tsk_info->lock_count++;
			break;
		}

	}
	read_unlock(&thdlist_lock);
	return 0;
}

static int before_futex_wake(struct kprobe *p, struct pt_regs *regs)
{

	struct list_head *pos;
        char task_show_buf[SHOW_BUF_LEN];
        struct bbox_data_info data_info;
	int len, i;
	
	if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid))
		return 0;
	data_info.data = task_show_buf;
	read_lock(&thdlist_lock);
	list_for_each(pos, &monitor_list){
		struct task_info *tsk_info = container_of(pos, struct task_info, task_list);
		if (tsk_info->pid == current->pid){
			//pos->fw_cout++;
			tsk_info->lock_delay = jiffies - tsk_info->lock_time;
			max_lock_time = tsk_info->lock_delay > max_lock_time ? tsk_info->lock_delay : max_lock_time;
			if (enable_print_ustack && tsk_info->lock && (tsk_info->lock_delay > lock_delay_thresold)){
				len = snprintf(task_show_buf, SHOW_BUF_LEN, "task %d[%s], lock over %ld ticks,",
						current->pid,current->comm, tsk_info->lock_delay);
				data_info.size = len;
				bbox_write(ulock_bid, &data_info);
				len = sprintf(task_show_buf,"user stack:\n");
				data_info.size = len;
				bbox_write(ulock_bid, &data_info);
				for (i = 0; i < STACK_DETAIL_DEPTH; i++){
					if (tsk_info->stack[i].bp == 0) {
						continue;
					}
					len = sprintf(task_show_buf, "#~  0x%lx   %s\n",
						tsk_info->stack[i].bp, tsk_info->stack[i].path);
					data_info.size = len;
					bbox_write(ulock_bid, &data_info);
				}
			}

			//tsk_info->lock = FALSE;
			tsk_info->fwake_time = jiffies;
			tsk_info->fwake_count++;
			break;
		}

	}
	read_unlock(&thdlist_lock);
	return 0;
}

static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
	int ret;

	if (!monitor_pid || (monitor_pid != current->pid && monitor_pid != current->tgid))
		return 0;

	write_lock(&thdlist_lock);
	ret = save_calltrace(regs);
	write_unlock(&thdlist_lock);
    return 0;
}


/*static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
{
	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
	return 0;
}
*/


static int futexpid_show(struct seq_file *m, void *v)
{
	struct list_head *pos;

	if (!monitor_pid) {
		seq_printf(m, "futex monitor list is empty\n");
		return 0;
	}

	seq_printf(m, "max_wait_time %ld ticks, max_lock_time %ld ticks\n", 
			max_wait_time, max_lock_time);
	read_lock(&thdlist_lock);
	list_for_each(pos, &monitor_list){
		struct task_info *tsk = container_of(pos, struct task_info, task_list);
		if (lock_owner && (tsk->pid == lock_owner))
			seq_puts(m,"current owner:\n");
		seq_printf(m, "pid[%d],name[%s],futex wait count[%lu],total futex_delay[%lu],",
				tsk->pid, tsk->comm, tsk->fwait_count, tsk->fwait_delay);
		seq_printf(m, "futex lock count[%lu],lock delay[%lu],wait over thresold count[%lu]\n",
				tsk->lock_count, tsk->lock_delay, tsk->outtime_count);
		//seq_printf(m,"schdule delay[none], ratio ：futex[none]/schdule[none]\n",
				//tsk->fwait_delay, tsk->fwait_delay, tsk->fwait_delay);

	}
	read_unlock(&thdlist_lock);
	bbox_ring_show(m, ulock_bid);
	return 0;
}

static ssize_t futexpid_store(void *priv, const char __user *buf, size_t count)
{
	char buffer[PROC_NUMBUF];
	struct task_struct *tsk;
	struct task_info *new_tsk;
	struct mm_struct *mm;
	struct file *vma_file;
	struct vm_area_struct *vma;
	struct vma_info *new_vma;
	struct pid *pid;
	pid_t pid_i;
	int err = -1;

	if (!enbale_ulockcheck){
		pr_warn("ulockcheck disabled!");
		return count;
	}

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		return -EFAULT;;
	}
	err = kstrtoint(strstrip(buffer), 0, &pid_i);
	if (err)
		return -EINVAL;
	read_lock(&thdlist_lock);

	if (!list_empty(&monitor_list)){
		read_unlock(&thdlist_lock);
		return count;
	}
	read_unlock(&thdlist_lock);
	
   	rcu_read_lock();
	
	pid= find_get_pid(pid_i);
	tsk = pid_task(pid, PIDTYPE_PID);
    if (!tsk || !(tsk->mm)){
       	rcu_read_unlock(); 
		return -EINVAL;
	}

	monitor_pid = pid_i;
	
	if (monitor_pid != 0 ){

		new_tsk = kzalloc(sizeof(struct task_info),GFP_KERNEL);
		if (!new_tsk)
			goto failed_tsk;
		new_tsk->pid = monitor_pid;
    	new_tsk->tgid = tsk->tgid;
		memcpy(new_tsk->comm,tsk->comm,sizeof(tsk->comm));

		mm = get_task_mm(tsk);

		if (IS_ERR_OR_NULL(mm)){
			rcu_read_unlock();
			goto failed;
		}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
		if (!mmap_read_trylock(mm)){
#else
		if (!down_read_trylock(&mm->mmap_sem)){
#endif
			rcu_read_unlock();
			goto failed;
		}

		for (vma = mm->mmap; vma; vma = vma->vm_next){
			if (vma->vm_file && vma->vm_flags & VM_EXEC){
				char buff[PATH_LEN];

				new_vma = kzalloc(sizeof(struct vma_info),GFP_KERNEL);
				if (!new_vma){
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
					mmap_read_unlock(mm);
#else
					up_read(&mm->mmap_sem);
#endif
					goto failed;
				}
				new_vma->start = vma->vm_start;
				new_vma->end = vma->vm_end;
				vma_file = vma->vm_file;

				if (vma_file){
					get_filename(buff, &vma_file->f_path, PATH_LEN);
					strcpy(new_vma->path, buff);
					list_add_tail(&new_vma->list,&vma_list);
				}
			}
		}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 10, 0)
		mmap_read_unlock(mm);
#else
		up_read(&mm->mmap_sem);
#endif
		write_lock(&thdlist_lock);
		list_add_tail(&new_tsk->task_list, &monitor_list);
		write_unlock(&thdlist_lock);
	}
	rcu_read_unlock();
	return count;

failed:
	kfree(new_tsk);
failed_tsk:
	rcu_read_unlock();
	monitor_pid = 0;
	return -ENOMEM;
}

DEFINE_PROC_ATTRIBUTE_RW(futexpid);

static int futexprint_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%d\n", (int)enable_print_ustack);
	return 0;
}

static ssize_t futexprint_store(void *priv, const char __user *buf, size_t count)
{
	char buffer[PROC_NUMBUF];
	int val;
	int err = -1;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		return -EFAULT;
	}
	err = kstrtoint(strstrip(buffer), 0, &val);

	if (val == 1)
		enable_print_ustack = true;
	else if (val == 0)
		enable_print_ustack = false;
	return count;
}

DEFINE_PROC_ATTRIBUTE_RW(futexprint);

static int ulockcheck_enable(void)
{
	int ret_wake, ret_wait;

	kp_wake.pre_handler = before_futex_wake;

	krp_wait.handler = after_futex_wait;
	krp_wait.entry_handler = entry_handler;

	ret_wake = register_kprobe(&kp_wake);
	if (ret_wake < 0) {
		pr_err("register_kprobe failed, returned %d\n", ret_wake);
		return -REGISTER_FAILED;
	}

	ret_wait = register_kretprobe(&krp_wait);
	if (ret_wait < 0) {
		pr_err("register_kretprobe failed, returned %d\n", ret_wait);
		unregister_kprobe(&kp_wake);
		return -REGISTER_FAILED;
	}
	pr_info("Planted return probe at %s: %p\n",
			krp_wait.kp.symbol_name, krp_wait.kp.addr);
	pr_info("Planted kprobe futex_wake at %p\n", kp_wake.addr);

	ulock_bid = bbox_alloc("ulockcheck", BBOX_TYPE_RING);
	if (ulock_bid < 0) {
		printk("bbox alloc failed,cannot enable\n");
		unregister_kprobe(&kp_wake);
		unregister_kretprobe(&krp_wait);
		return -ENOMEM;
	}

	return 0;
}

void ulockcheck_disable(void)
{
	unregister_kprobe(&kp_wake);
	unregister_kretprobe(&krp_wait);

	pr_info("kprobe futex_wake at %p unregistered\n", kp_wake.addr);
	pr_info("kretprobe futex_wait at %p unregistered\n", krp_wait.kp.addr);
	/* nmissed > 0 suggests that maxactive was set too low. */
	pr_info("Missed probing %d instances of %s\n",
		krp_wait.nmissed, krp_wait.kp.symbol_name);

	bbox_free(ulock_bid);
}

static int futexenable_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%d\n", (int)enbale_ulockcheck);
	return 0;
}

static ssize_t futexenable_store(void *priv, const char __user *buf, size_t count)
{
	char buffer[PROC_NUMBUF];
	int val;
	int err = -1;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		return -EFAULT;
	}
	err = kstrtoint(strstrip(buffer), 0, &val);

	if (val == 1){
		if (!ulockcheck_enable())
			enbale_ulockcheck = true;
	}else if (val == 0){
		ulockcheck_disable();
		enbale_ulockcheck = false;
	}
	return count;
}

DEFINE_PROC_ATTRIBUTE_RW(futexenable);

static int wait_delaythresold_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%d\n", wait_delay_thresold);
	return 0;
}

static ssize_t wait_delaythresold_store(void *priv, const char __user *buf, size_t count)
{
	char buffer[PROC_NUMBUF];
	int val;
	int err = -1;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		return -EFAULT;
	}
	err = kstrtoint(strstrip(buffer), 0, &val);
	if (err)
		return -EINVAL;

	wait_delay_thresold = val;
	return count;
}

DEFINE_PROC_ATTRIBUTE_RW(wait_delaythresold);

static int lock_delaythresold_show(struct seq_file *m, void *v)
{
	seq_printf(m, "%d\n", lock_delay_thresold);
	return 0;
}

static ssize_t lock_delaythresold_store(void *priv, const char __user *buf, size_t count)
{
	char buffer[PROC_NUMBUF];
	int val;
	int err = -1;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		return -EFAULT;
	}
	err = kstrtoint(strstrip(buffer), 0, &val);
	if (err)
		return -EINVAL;

	lock_delay_thresold = val;
	return count;
}

DEFINE_PROC_ATTRIBUTE_RW(lock_delaythresold);


int ulockcheck_init(void)
{
	struct proc_dir_entry *parent_dir;
	struct proc_dir_entry *entry_enable;
	struct proc_dir_entry *entry_print;
	struct proc_dir_entry *entry_pid;

	parent_dir = sysak_proc_mkdir("ulockcheck");
	if (!parent_dir) {
		goto failed_root;
	}

	entry_enable = proc_create("enable", 0644, parent_dir, &futexenable_fops);
	if(!entry_enable) {
		goto failed;
	}

	entry_print = proc_create("enable_print_ustack", 0644, parent_dir, &futexprint_fops);
	if(!entry_print) {
		goto failed;
	}

	entry_pid = proc_create("ulockcheck_pid", 0644, parent_dir, &futexpid_fops);
	if(!entry_pid) {
		goto failed;
	}

	if(!proc_create("wait_delaythresold", 0644, parent_dir, &wait_delaythresold_fops))
		goto failed;

	if(!proc_create("lock_delaythresold", 0644, parent_dir, &lock_delaythresold_fops))
		goto failed;

	return 0;

failed:
	sysak_remove_proc_entry("ulockcheck");
failed_root:
	return -1;
}

int ulockcheck_exit(void)
{
	struct list_head *tsk_entry;
	struct list_head *vma_entry;
	struct list_head *tsk_prev;
	struct list_head *vma_prev;

	if (!monitor_pid)
		return 0;

	if (enbale_ulockcheck)
		ulockcheck_disable();

	list_for_each(tsk_entry, &monitor_list){
		struct task_info *tsk = container_of(tsk_entry, struct task_info, task_list);
		tsk_prev = tsk_entry->prev;

		list_del(tsk_entry);
		kfree(tsk);
		tsk_entry = tsk_prev;
	}

	list_for_each(vma_entry, &vma_list){
		struct vma_info *vma = container_of(vma_entry, struct vma_info, list);
		vma_prev = vma_entry->prev;

		list_del(vma_entry);
		kfree(vma);
		vma_entry = vma_prev;
	}
	return 0;
}
#endif
